Analyse time-dependent proteome

2. Test burn-in period and steady state

We want to simulate 5 min intervals in quasi-steady state, so we cannot start with an empty cell; we have to allow for a burn-in period to reach steady state and discard timecourses of this period.

In [14]:
fig, ax = plt.subplots(size, size, figsize=(16, 16))

plt.suptitle('Selected protein timecourses', fontsize=18, fontweight='bold')

for i in range(size):
    for j in range(size):
        yseries = results[time]['timecourses'][protnames_plot[i][j]]
        xseries = np.arange(0., float(len(yseries)), 1.)
        # oddly, sometimes the simulation sometimes 'loses' one data point
        length = min(len(xseries), len(yseries))
        ax[i, j].plot(xseries[:length], yseries[:length])
        ax[i, j].set_xticks(np.arange(min(xseries), max(xseries)+1, 200.0))
        ax[i, j].set_title(protnames_plot[i][j], fontdict={'fontsize': 12, 'fontweight': 'bold'})
        ax[i, j].tick_params(axis='both', which='major', labelsize=10)

Plot increases and check for saturation of "first derivative":

In [15]:
burnin = len(yseries) - 300  # 300 s simulation, rest is burn-in
fig, ax = plt.subplots(size, size, figsize=(16, 16))

plt.suptitle('First derivatives of selected protein timecourses', fontsize=18, fontweight='bold')

for i in range(size):
    for j in range(size):
        yseries = results[time]['timecourses'][protnames_plot[i][j]]
        dy = np.gradient(yseries)
        xseries = np.arange(0., float(len(dy)), 1.)
        # oddly, sometimes the simulation sometimes 'loses' one data point
        length = min(len(xseries), len(yseries))
        ax[i, j].hist(xseries, bins=10, weights=dy)
        ax[i, j].axvline(burnin, color='red', linestyle='--')
        ax[i, j].set_xticks(np.arange(min(xseries), max(xseries)+1, 200.0))
        ax[i, j].set_title(protnames_plot[i][j], fontdict={'fontsize': 12, 'fontweight': 'bold'})
        ax[i, j].tick_params(axis='both', which='major', labelsize=10)

3. Plot proteomes at different times

Prevent output from scrolling:

In [80]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 999;
In [81]:
times = np.arange(0, 3600, 600)[:4]
#rows = len(times/columns)
for i, tx in enumerate(times):
    #fig, ax = plt.subplots(rows, columns, figsize=(16, 16))
    for ty in times[i+1:]:
        fig =  plt.figure()                
        plt.plot([1e-1, 1e6-100000], [1e-1, 1e6-100000], color='red', linestyle='-', linewidth=0.5)
        scatter_dicts(proteome[tx], proteome[ty], xaxislabel="proteome ({} s)".format(tx+300), 
                      yaxislabel="proteome ({} s)".format(ty+300), fontsize=None)
        plt.show()
3531
3535
3564
4612
4727
4776

4. Plot translational impact

First we check whether the relevant genes are in our data set.

In [12]:
len(results[time]['timecourses'].keys())
Out[12]:
3611
In [13]:
histones = ['YDR224C', 'YBL002W', 'YDR225W', 'YBL003C', 'YOL012C', 'YBR009C', 'YNL030W', 'YBR010W', 'YNL031C', 'YPL127C']
In [14]:
for histone in histones:
    print "Histone {} found in data set: {}".format(histone, histone in results[time]['timecourses'])
Histone YDR224C found in data set: True
Histone YBL002W found in data set: True
Histone YDR225W found in data set: True
Histone YBL003C found in data set: True
Histone YOL012C found in data set: True
Histone YBR009C found in data set: True
Histone YNL030W found in data set: True
Histone YBR010W found in data set: True
Histone YNL031C found in data set: True
Histone YPL127C found in data set: True
In [15]:
TFs = ["YNL027W", "YLR113W", "YMR174W", "YER062C", "YHR084W", "YDL056W", "YLR182W", "YER111C", "YLR118W", "YMR043W"]
In [16]:
for TF in TFs:
    print "TF {} found in data set: {}".format(TF, TF in results[time]['timecourses'])
TF YNL027W found in data set: True
TF YLR113W found in data set: True
TF YMR174W found in data set: False
TF YER062C found in data set: True
TF YHR084W found in data set: False
TF YDL056W found in data set: True
TF YLR182W found in data set: True
TF YER111C found in data set: True
TF YLR118W found in data set: False
TF YMR043W found in data set: True

Get mRNAs:

In [17]:
transcriptomes_dict = collections.OrderedDict(sorted(
        pkl.load((open('../parameters/transcriptome_time_dependent.p'))).items()))
In [18]:
transcriptomes_dict.keys()
Out[18]:
[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60]
In [19]:
absolute_transcript_count = {time: sum(transcriptomes_dict[time].values()) for time in sorted(transcriptomes_dict.keys())}
In [20]:
plt.scatter(absolute_transcript_count.keys(), absolute_transcript_count.values(), marker='o')
Out[20]:
<matplotlib.collections.PathCollection at 0x7f456dd4dd10>

TODO: this can be made look nicer for sure.

Generate fold-changes for transcriptome:

In [21]:
time = 30 * 60
In [22]:
print len(transcriptomes_dict[time/60])
print len(transcriptomes_dict[0])
print len(transcriptomes_dict[3000/60])
6651
6651
6651
In [23]:
print transcriptomes_dict.keys()
print len(results[2400]['timecourses'].keys())  # some proteins were not translated (probably no transcripts)
[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60]
4332

First we try only one pair of times:

In [24]:
time_0 = 0
time_t = 30 * 60  # time always in seconds
In [25]:
def transcriptome_fold_change(time_0, time_t):
    tmp = {}
    for gene in transcriptomes_dict[time_0/60]:
        if gene in transcriptomes_dict[time_t/60] and transcriptomes_dict[time_0/60][gene] != 0 and \
        transcriptomes_dict[time_t/60][gene] != 0:
            tmp[gene] = np.log2(1.0 * transcriptomes_dict[time_t/60][gene]/transcriptomes_dict[time_0/60][gene])
    return tmp
In [26]:
len(transcriptome_fold_change(time_0, time_t))
Out[26]:
3567
In [27]:
def proteome_fold_change(time_0, time_t):
    tmp = {}
    for gene in results[time_0]['timecourses']:
        if gene in results[time_t]['timecourses'] and results[time_0]['timecourses'][gene][-1] != 0 and \
        results[time_t]['timecourses'][gene][-1] != 0:
            tmp[gene] = np.log2(1.0 * results[time_t]['timecourses'][gene][-1]/results[time_0]['timecourses'][gene][-1])
    return tmp
In [28]:
len(proteome_fold_change(time_0, time_t))
Out[28]:
3615
In [29]:
def scatter_dicts_interactive(dict1,  dict2, xaxislabel=None, yaxislabel=None, fontsize=None):
    from bokeh.plotting import figure, show, ColumnDataSource
    from bokeh.models import HoverTool
    from bokeh.io import output_notebook
    
    plotkeys = list(dict1.viewkeys() & dict2.viewkeys()) # set intersection
    xs = [dict1[key] for key in plotkeys]
    ys = [dict2[key] for key in plotkeys] 

    plt.close()
    output_notebook()

    source = ColumnDataSource(data=dict(x=xs, y=ys, desc=plotkeys, ))

    hover = HoverTool(
            tooltips=[
                #("index", "$index"),
                #("(x,y)", "($x, $y)"),
                ("", "@desc"),
            ]
        )

    p = figure(plot_width=600, plot_height=600, tools=[hover,"wheel_zoom,box_zoom,reset"],
               #x_axis_type="log", 
               x_range=[0.8*min(xs), max(xs)/0.8], 
               #y_axis_type="log", 
               y_range=[0.8*min(ys), max(ys)/0.8], 
               title="",)
    p.xaxis.axis_label = xaxislabel
    p.yaxis.axis_label = yaxislabel
    p.xaxis.axis_label_text_font_size = "10pt"
    p.yaxis.axis_label_text_font_size = "10pt"
    p.scatter('x', 'y', size=5, source=source, fill_color=None, line_alpha=0.5, line_color='green')

    show(p)
In [30]:
times = [0, 300, 600, 900]
for i, tx in enumerate(times):
    #fig, ax = plt.subplots(rows, columns, figsize=(16, 16))
    for ty in times[i+1:]:
        scatter_dicts_interactive(transcriptome_fold_change(tx, ty), proteome_fold_change(tx, ty),
                                  "transcript abundance (fold change) ("+str(tx)+" s to "+str(ty)+" s)", 
                                  "proteome abundance (fold change) ("+str(tx)+" s to "+str(ty)+" s)")
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...
Loading BokehJS ...

5. Analysis for WCM_GRN (abundant periodic genes found by Katja)

In [31]:
genes_shortlist = {'HHF1': 'YBR009C', 'HHF2': 'YNL030W', 'HHT1': 'YBR010W', 'HHT2': 'YNL031C', 'HTB1': 'YDR224C', 
                   'HTB2': 'YBL002W', 'PMA1': 'YGL008C', 'RNR1': 'YER070W', 'RNR2': 'YJL026W', 'RNR3': 'YIL066C', 
                   'RNR4': 'YGR180C'}
In [32]:
translation_rate_shortlist = {}

for (genecommon, gene) in genes_shortlist.items():
    translation_rate_shortlist[genecommon] = []
    for time in np.arange(0, 3600, 300):
        if gene in results[time]['timecourses']:
            rate = float(results[time]['timecourses'][gene][-1]-results[time]['timecourses'][gene][-301])/300.
        else:
            rate = 0.
        translation_rate_shortlist[genecommon].append(rate)
In [33]:
translation_ratedf = pd.DataFrame(translation_rate_shortlist, index=np.arange(0, 3600, 300))
In [34]:
pl = translation_ratedf.plot()
pl.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
pl.set_xlabel("time[s]")
pl.set_ylabel("proteins/s")
Out[34]:
<matplotlib.text.Text at 0x7f456cce2ad0>
In [35]:
translation_ratedf
Out[35]:
HHF1 HHF2 HHT1 HHT2 HTB1 HTB2 PMA1 RNR1 RNR2 RNR3 RNR4
0 0.153333 0.516667 0.193333 0.666667 1.033333 0.400000 0.423333 0.300000 0.790000 0.000000 1.266667
300 0.106667 0.603333 0.130000 1.103333 1.336667 0.946667 0.600000 1.223333 0.950000 0.000000 1.200000
600 0.426667 1.820000 0.446667 1.996667 3.036667 3.426667 0.830000 2.166667 1.370000 0.060000 1.466667
900 2.233333 3.040000 1.690000 3.680000 6.900000 7.673333 0.520000 1.453333 1.620000 0.073333 1.610000
1200 4.280000 4.166667 3.060000 4.773333 11.463333 10.573333 0.890000 0.716667 1.486667 0.040000 1.983333
1500 3.933333 3.796667 1.970000 5.576667 8.586667 10.276667 1.376667 0.400000 1.820000 0.023333 1.183333
1800 2.366667 2.023333 0.986667 3.410000 6.116667 7.096667 1.210000 0.233333 1.756667 0.013333 2.146667
2100 1.313333 1.196667 0.586667 2.190000 3.753333 4.066667 1.576667 0.196667 1.640000 0.000000 1.730000
2400 0.526667 0.960000 0.260000 1.386667 1.920000 2.486667 1.126667 0.093333 1.360000 0.000000 1.276667
2700 0.380000 0.583333 0.183333 0.776667 0.953333 1.366667 1.716667 0.146667 1.246667 0.000000 1.636667
3000 0.193333 0.626667 0.163333 0.886667 0.590000 1.000000 1.236667 0.226667 0.710000 0.000000 0.870000
3300 0.440000 0.543333 0.200000 0.976667 1.160000 1.143333 1.973333 0.280000 0.793333 0.000000 1.400000
In [36]:
transcriptome_df = pd.DataFrame(transcriptomes_dict).transpose()
del transcriptome_df[0]
transcriptome_df.index = np.arange(0, 3601, 300)
transcriptome_df
Out[36]:
Q0010 Q0017 Q0032 Q0045 Q0050 Q0055 Q0060 Q0065 Q0070 Q0075 ... YPR196W YPR197C YPR198W YPR199C YPR200C YPR201W YPR202W YPR203W YPR204C-A YPR204W
0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
300 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
600 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 1 0 0 0 0 0
900 0 0 0 0 0 0 0 0 0 0 ... 0 0 2 1 0 0 0 0 0 0
1200 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 1 1 0 0 0 0 0
1500 0 0 0 0 0 0 0 0 0 0 ... 1 0 2 1 0 0 0 0 0 0
1800 0 0 0 0 0 0 0 0 0 0 ... 0 0 2 2 0 0 0 0 0 0
2100 0 0 0 0 0 0 0 0 0 0 ... 0 0 2 1 0 0 0 0 0 0
2400 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0
2700 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0
3000 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3300 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 2 0 0 0 0 0 0
3600 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0

13 rows × 6650 columns

In [37]:
transcriptome_df_short = transcriptome_df[transcriptome_df.index<3600][genes_shortlist.values()]
In [38]:
transcriptome_df_short.columns = genes_shortlist.keys()
transcriptome_df_short
Out[38]:
HTB2 HTB1 HHF1 HHF2 PMA1 RNR2 RNR3 HHT2 RNR1 RNR4 HHT1
0 2 4 2 7 3 5 0 8 3 7 2
300 4 5 1 8 4 6 0 13 12 7 2
600 27 18 10 50 10 16 1 55 46 15 12
900 58 41 55 77 6 18 2 97 29 15 42
1200 77 65 106 107 11 16 1 119 14 19 70
1500 113 68 152 152 24 30 1 232 12 17 81
1800 75 48 91 81 22 29 1 141 6 29 40
2100 44 28 47 46 28 27 0 81 5 23 23
2400 12 8 8 15 8 10 0 20 1 8 4
2700 7 4 6 9 12 9 0 11 2 10 3
3000 4 2 2 6 7 4 0 9 2 4 2
3300 6 5 6 8 15 6 0 16 3 9 3
In [39]:
translation_rate_per_mrna = translation_ratedf/transcriptome_df_short
In [40]:
pl = translation_rate_per_mrna.plot()
pl.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
pl.set_xlabel("time[s]")
pl.set_ylabel("proteins/mRNA/s")
Out[40]:
<matplotlib.text.Text at 0x7f456cc706d0>

Translation efficiency goes down in S phase (at 1500 s) because something (ribosomes, tRNA?) becomes limiting

In [41]:
translation_rate_per_mrna
Out[41]:
HHF1 HHF2 HHT1 HHT2 HTB1 HTB2 PMA1 RNR1 RNR2 RNR3 RNR4
0 0.076667 0.073810 0.096667 0.083333 0.258333 0.200000 0.141111 0.100000 0.158000 NaN 0.180952
300 0.106667 0.075417 0.065000 0.084872 0.267333 0.236667 0.150000 0.101944 0.158333 NaN 0.171429
600 0.042667 0.036400 0.037222 0.036303 0.168704 0.126914 0.083000 0.047101 0.085625 0.060000 0.097778
900 0.040606 0.039481 0.040238 0.037938 0.168293 0.132299 0.086667 0.050115 0.090000 0.036667 0.107333
1200 0.040377 0.038941 0.043714 0.040112 0.176359 0.137316 0.080909 0.051190 0.092917 0.040000 0.104386
1500 0.025877 0.024978 0.024321 0.024037 0.126275 0.090944 0.057361 0.033333 0.060667 0.023333 0.069608
1800 0.026007 0.024979 0.024667 0.024184 0.127431 0.094622 0.055000 0.038889 0.060575 0.013333 0.074023
2100 0.027943 0.026014 0.025507 0.027037 0.134048 0.092424 0.056310 0.039333 0.060741 NaN 0.075217
2400 0.065833 0.064000 0.065000 0.069333 0.240000 0.207222 0.140833 0.093333 0.136000 NaN 0.159583
2700 0.063333 0.064815 0.061111 0.070606 0.238333 0.195238 0.143056 0.073333 0.138519 NaN 0.163667
3000 0.096667 0.104444 0.081667 0.098519 0.295000 0.250000 0.176667 0.113333 0.177500 NaN 0.217500
3300 0.073333 0.067917 0.066667 0.061042 0.232000 0.190556 0.131556 0.093333 0.132222 NaN 0.155556
In [42]:
translation_rate_per_mrna.to_csv('translation_rate_per_mrna.csv')

Comparison with initiation probabilities:translation_rate_per_mrna

In [43]:
init_rates = pkl.load(open("../parameters/init_rates_enhanced_median.p", "rb"))
init_rates_common = {gene: init_rates[genes_shortlist[gene]]*1e6 for gene in genes_shortlist}
In [44]:
scatter_dicts(dict(translation_rate_per_mrna.ix[3300]), init_rates_common, xaxislabel="translation rate per transcript", 
              yaxislabel="initiation rate * 1E6", lws=False, logx=False, logy=False)
11

Translation rate is practically proportional to initiation rate.

In [45]:
init_rates_common
Out[45]:
{'HHF1': 1.291871859830537,
 'HHF2': 1.291871859830537,
 'HHT1': 1.291871859830537,
 'HHT2': 1.291871859830537,
 'HTB1': 8.580451211929118,
 'HTB2': 5.738927640019162,
 'PMA1': 3.2253823872000513,
 'RNR1': 1.7477148350729443,
 'RNR2': 3.4541884421416924,
 'RNR3': 1.1468920259159536,
 'RNR4': 4.039182074735152}

6. Translational efficiency over time (like 5, but at genome-scale)

In [46]:
translation_rate_shortlist = {}
genes = transcriptomes_dict[0].keys()[1:]

for gene in genes:
    translation_rate_shortlist[gene] = []
    for time in np.arange(0, 3600, 300):
        if gene in results[time]['timecourses']:
            rate = float(results[time]['timecourses'][gene][-1]-results[time]['timecourses'][gene][-301])/300.
        else:
            rate = 0.
        translation_rate_shortlist[gene].append(rate)

translation_ratedf = pd.DataFrame(translation_rate_shortlist, index=np.arange(0, 3600, 300))

Translatome:

In [47]:
translation_ratedf
Out[47]:
Q0010 Q0017 Q0032 Q0045 Q0050 Q0055 Q0060 Q0065 Q0070 Q0075 ... YPR196W YPR197C YPR198W YPR199C YPR200C YPR201W YPR202W YPR203W YPR204C-A YPR204W
0 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.000000 0.000000 0.000000 0 0 0 0 0
300 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.103333 0.000000 0.000000 0 0 0 0 0
600 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.050000 0.000000 0.036667 0 0 0 0 0
900 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.096667 0.013333 0.000000 0 0 0 0 0
1200 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.033333 0.020000 0.030000 0 0 0 0 0
1500 0 0 0 0 0 0 0 0 0 0 ... 0.03 0 0.053333 0.010000 0.000000 0 0 0 0 0
1800 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.046667 0.016667 0.000000 0 0 0 0 0
2100 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.040000 0.006667 0.000000 0 0 0 0 0
2400 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.086667 0.030000 0.000000 0 0 0 0 0
2700 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.063333 0.023333 0.000000 0 0 0 0 0
3000 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.000000 0.000000 0.000000 0 0 0 0 0
3300 0 0 0 0 0 0 0 0 0 0 ... 0.00 0 0.056667 0.070000 0.000000 0 0 0 0 0

12 rows × 6650 columns

Is the first row identically zero?

In [48]:
translation_ratedf.ix[0].describe()
Out[48]:
count    6650.000000
mean        0.275242
std         1.335686
min         0.000000
25%         0.000000
50%         0.040000
75%         0.113333
max        36.263333
Name: 0, dtype: float64

No it isn't. (Because the 0 refers to the interval [0 s, 300 s] etc.)

Transcriptome:

In [49]:
transcriptome_df = pd.DataFrame(transcriptomes_dict).transpose()
del transcriptome_df[0]
transcriptome_df.index = np.arange(0, 3601, 300)
transcriptome_df
Out[49]:
Q0010 Q0017 Q0032 Q0045 Q0050 Q0055 Q0060 Q0065 Q0070 Q0075 ... YPR196W YPR197C YPR198W YPR199C YPR200C YPR201W YPR202W YPR203W YPR204C-A YPR204W
0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
300 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
600 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 0 1 0 0 0 0 0
900 0 0 0 0 0 0 0 0 0 0 ... 0 0 2 1 0 0 0 0 0 0
1200 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 1 1 0 0 0 0 0
1500 0 0 0 0 0 0 0 0 0 0 ... 1 0 2 1 0 0 0 0 0 0
1800 0 0 0 0 0 0 0 0 0 0 ... 0 0 2 2 0 0 0 0 0 0
2100 0 0 0 0 0 0 0 0 0 0 ... 0 0 2 1 0 0 0 0 0 0
2400 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0
2700 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0
3000 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3300 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 2 0 0 0 0 0 0
3600 0 0 0 0 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0

13 rows × 6650 columns

Divide one by the other to get the translational efficiency per mRNA:

In [50]:
translation_rate_per_mrna = translation_ratedf/transcriptome_df
translation_rate_per_mrna
Out[50]:
Q0010 Q0017 Q0032 Q0045 Q0050 Q0055 Q0060 Q0065 Q0070 Q0075 ... YPR196W YPR197C YPR198W YPR199C YPR200C YPR201W YPR202W YPR203W YPR204C-A YPR204W
0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
300 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN 0.103333 NaN NaN NaN NaN NaN NaN NaN
600 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN 0.050000 NaN 0.036667 NaN NaN NaN NaN NaN
900 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN 0.048333 0.013333 NaN NaN NaN NaN NaN NaN
1200 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN 0.033333 0.020000 0.030000 NaN NaN NaN NaN NaN
1500 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 0.03 NaN 0.026667 0.010000 NaN NaN NaN NaN NaN NaN
1800 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN 0.023333 0.008333 NaN NaN NaN NaN NaN NaN
2100 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN 0.020000 0.006667 NaN NaN NaN NaN NaN NaN
2400 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN 0.086667 0.030000 NaN NaN NaN NaN NaN NaN
2700 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN 0.063333 0.023333 NaN NaN NaN NaN NaN NaN
3000 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3300 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN 0.056667 0.035000 NaN NaN NaN NaN NaN NaN
3600 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

13 rows × 6650 columns

In [51]:
translation_rate_per_mrna.dropna(axis=1, how='all', inplace=True)
In [52]:
pl = translation_rate_per_mrna.ix[:, :18].plot()
pl.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
pl.set_xlabel("time[s]")
pl.set_ylabel("proteins/mRNA/s")
#pl.set_yscale("log")
Out[52]:
<matplotlib.text.Text at 0x7f456e446890>

Looks like the suppressed translation efficiency is not just limited to histones.

In [53]:
translation_rate_per_mrna
Out[53]:
YAL001C YAL002W YAL003W YAL005C YAL007C YAL008W YAL009W YAL010C YAL011W YAL012W ... YPR187W YPR188C YPR189W YPR190C YPR191W YPR192W YPR196W YPR198W YPR199C YPR200C
0 NaN 0.006667 0.228596 0.075057 0.093333 0.166667 0.040000 NaN NaN 0.189583 ... 0.126667 0.080000 0.063333 0.063333 0.093333 NaN NaN NaN NaN NaN
300 0.043333 NaN 0.235556 0.082821 0.113333 0.156667 NaN NaN NaN 0.188788 ... 0.135000 0.093333 0.066667 0.063333 0.123333 NaN NaN 0.103333 NaN NaN
600 0.036667 0.013333 0.131528 0.037312 0.057222 0.081667 0.015000 NaN 0.023333 0.106462 ... 0.063077 0.036667 0.030000 0.033333 0.070000 NaN NaN 0.050000 NaN 0.036667
900 0.036667 0.016667 0.144314 0.036879 0.058000 0.076667 0.016667 0.033333 0.031667 0.108634 ... 0.065128 0.048000 0.036667 0.035000 0.076667 NaN NaN 0.048333 0.013333 NaN
1200 0.033333 0.003333 0.146928 0.040733 0.054000 0.078333 0.025000 NaN 0.036667 0.110909 ... 0.074444 0.060000 0.020000 0.040667 0.058333 NaN NaN 0.033333 0.020000 0.030000
1500 0.010000 0.008333 0.095000 0.024418 0.035000 0.057500 0.014444 NaN 0.016667 0.076914 ... 0.042222 0.036667 0.020000 0.014444 0.033333 0.013333 0.03 0.026667 0.010000 NaN
1800 0.018333 0.010000 0.099200 0.025950 0.038750 0.075000 0.021333 0.013333 0.028333 0.076159 ... 0.044444 0.033333 0.019167 0.025556 0.043333 NaN NaN 0.023333 0.008333 NaN
2100 0.020000 0.013333 0.101763 0.026599 0.043333 0.053333 0.013333 0.030000 0.020000 0.079667 ... 0.051111 0.044444 0.021667 0.017143 0.041111 NaN NaN 0.020000 0.006667 NaN
2400 0.040000 0.036667 0.211111 0.067073 0.113333 0.143333 0.041111 NaN NaN 0.173333 ... 0.104545 0.063333 0.071667 0.036667 0.086667 NaN NaN 0.086667 0.030000 NaN
2700 0.026667 0.006667 0.204752 0.066667 0.093333 0.140000 0.030000 NaN 0.070000 0.172195 ... 0.108485 0.091667 0.066667 0.045000 0.086667 NaN NaN 0.063333 0.023333 NaN
3000 NaN NaN 0.266377 0.107051 0.140000 0.153333 0.073333 NaN NaN 0.226852 ... 0.151429 NaN 0.090000 0.106667 0.133333 NaN NaN NaN NaN NaN
3300 0.036667 0.013333 0.205507 0.063188 0.102222 NaN 0.043333 NaN 0.046667 0.165969 ... 0.106389 0.085000 0.068333 0.048333 0.100000 NaN NaN 0.056667 0.035000 NaN
3600 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

13 rows × 5584 columns

In [54]:
translation_rate_per_mrna.mean(axis=1, skipna=True)
Out[54]:
0       0.087966
300     0.091026
600     0.041258
900     0.043180
1200    0.044599
1500    0.027993
1800    0.028390
2100    0.029282
2400    0.073982
2700    0.074180
3000    0.109592
3300    0.070785
3600         NaN
dtype: float64
In [55]:
translation_rate_per_mrna.mean(axis=1, skipna=True).plot()
Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f456e2dd210>

Confirms the hypothesis that something is limiting during S phase.

7. Translational efficiency by initiation probability

Now we want to explode the previous graph by initiation probability.

Load unadjusted initiation probabilites:

In [56]:
init_rates = pkl.load(open("../parameters/init_rates_plotkin.p", "rb"))

Frequency distribution of initiation rates:

In [57]:
plt.hist(init_rates.values(), bins = 10**np.linspace(np.log10(min(init_rates.values())), 
                                                              np.log10(max(init_rates.values())), num=50))
plt.gca().set_xscale("log")
plt.gca().set_xlim([min(init_rates.values()), max(init_rates.values())])
plt.gca().set_xlabel('initiation rates [1/s]')
plt.show()

Define translation rate as the average number of proteins produced per second over a 300 s interval:

In [58]:
translation_rate = {}

for gene in init_rates:
    translation_rate[gene] = []
    for time in np.arange(0, 3600, 300):
        if gene in results[time]['timecourses']:
            rate = float(results[time]['timecourses'][gene][-1]-results[time]['timecourses'][gene][-301])/300.
        else:
            rate = 0.
        translation_rate[gene].append(rate)

Convert to data frame:

In [59]:
translation_rate_df = pd.DataFrame(translation_rate)
translation_rate_df.index = np.arange(0, 3600, 300)
translation_rate_df['time'] = translation_rate_df.index
translation_rate_df
Out[59]:
YAL001C YAL002W YAL003W YAL007C YAL008W YAL009W YAL010C YAL011W YAL012W YAL013W ... YPR185W YPR186C YPR187W YPR188C YPR189W YPR190C YPR191W YPR198W YPR199C time
0 0.000000 0.006667 4.343333 0.093333 0.166667 0.040000 0.000000 0.000000 4.550000 0.110000 ... 0.000000 0.000000 0.633333 0.080000 0.063333 0.063333 0.093333 0.000000 0.000000 0
300 0.043333 0.000000 3.533333 0.113333 0.156667 0.000000 0.000000 0.000000 4.153333 0.110000 ... 0.053333 0.000000 0.540000 0.093333 0.066667 0.063333 0.123333 0.103333 0.000000 300
600 0.036667 0.013333 6.313333 0.343333 0.326667 0.030000 0.000000 0.023333 6.920000 0.126667 ... 0.013333 0.016667 0.820000 0.073333 0.060000 0.066667 0.140000 0.050000 0.000000 600
900 0.036667 0.016667 7.360000 0.290000 0.230000 0.033333 0.033333 0.063333 6.626667 0.183333 ... 0.020000 0.036667 0.846667 0.240000 0.073333 0.070000 0.076667 0.096667 0.013333 900
1200 0.033333 0.003333 7.493333 0.270000 0.156667 0.050000 0.000000 0.036667 6.100000 0.206667 ... 0.020000 0.033333 1.116667 0.180000 0.020000 0.203333 0.116667 0.033333 0.020000 1200
1500 0.020000 0.016667 9.500000 0.280000 0.230000 0.086667 0.000000 0.016667 6.230000 0.133333 ... 0.026667 0.083333 0.886667 0.146667 0.080000 0.043333 0.100000 0.053333 0.010000 1500
1800 0.036667 0.020000 9.920000 0.310000 0.150000 0.106667 0.013333 0.056667 7.006667 0.163333 ... 0.020000 0.036667 0.800000 0.100000 0.076667 0.076667 0.086667 0.046667 0.016667 1800
2100 0.020000 0.040000 10.583333 0.346667 0.106667 0.053333 0.030000 0.040000 7.170000 0.180000 ... 0.013333 0.020000 1.073333 0.133333 0.086667 0.120000 0.123333 0.040000 0.006667 2100
2400 0.040000 0.036667 5.700000 0.226667 0.143333 0.123333 0.000000 0.000000 5.720000 0.196667 ... 0.000000 0.070000 1.150000 0.063333 0.143333 0.036667 0.086667 0.086667 0.030000 2400
2700 0.026667 0.006667 9.623333 0.093333 0.140000 0.030000 0.000000 0.070000 7.060000 0.113333 ... 0.036667 0.000000 1.193333 0.183333 0.066667 0.090000 0.173333 0.063333 0.023333 2700
3000 0.000000 0.000000 6.126667 0.140000 0.153333 0.073333 0.000000 0.000000 4.083333 0.126667 ... 0.060000 0.000000 1.060000 0.000000 0.090000 0.106667 0.133333 0.000000 0.000000 3000
3300 0.036667 0.013333 9.453333 0.306667 0.000000 0.043333 0.000000 0.046667 7.136667 0.193333 ... 0.020000 0.046667 1.276667 0.170000 0.136667 0.096667 0.100000 0.056667 0.070000 3300

12 rows × 4840 columns

Convert initiation rates to data frame:

In [60]:
init_rates_df = pd.DataFrame.from_dict(init_rates, orient='index')
init_rates_df.columns = ['init_rates']
init_rates_df['init_rates'] *= 1000000

Define exponential bins for bucketing:

In [61]:
bins = 10**np.linspace(np.log10(min(init_rates_df['init_rates'])), np.log10(max(init_rates_df['init_rates'])), num=20)
bins
Out[61]:
array([  9.37576635e-04,   1.55722534e-03,   2.58640273e-03,
         4.29576818e-03,   7.13486112e-03,   1.18503236e-02,
         1.96822570e-02,   3.26903511e-02,   5.42955544e-02,
         9.01797358e-02,   1.49779938e-01,   2.48770187e-01,
         4.13183547e-01,   6.86258453e-01,   1.13980982e+00,
         1.89311537e+00,   3.14428401e+00,   5.22235577e+00,
         8.67383470e+00,   1.44064119e+01])
In [62]:
translation_rate_df_long = pd.merge(init_rates_df, pd.melt(translation_rate_df, id_vars=['time']), 
                                    left_index=True, right_on='variable')
In [63]:
scatter_dicts(translation_rate_df_long['init_rates'].to_dict(), translation_rate_df_long['value'].to_dict(), 
              logx=False, logy=False)
58068

Not interesting.

Cut data frame into bins:

In [64]:
translation_rate_df_long['bin'] = pd.cut(translation_rate_df_long.init_rates, bins)
translation_rate_df_long.head()
Out[64]:
init_rates time variable value bin
3564 1.644214 0 YBR177C 0.076667 (1.14, 1.893]
3565 1.644214 300 YBR177C 0.113333 (1.14, 1.893]
3566 1.644214 600 YBR177C 0.000000 (1.14, 1.893]
3567 1.644214 900 YBR177C 0.116667 (1.14, 1.893]
3568 1.644214 1200 YBR177C 0.100000 (1.14, 1.893]
In [65]:
translation_rate_df_long_grouped = translation_rate_df_long.groupby(['bin', 'time'])
translation_rate_df_long_averaged = translation_rate_df_long_grouped.mean()

Remember this is a hierarchical column index:

In [66]:
translation_rate_by_IP_by_time = translation_rate_df_long_averaged.unstack()['value']
/home/martin/.local/lib/python2.7/site-packages/pandas/core/index.py:4281: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  return np.sum(name == np.asarray(self.names)) > 1
In [67]:
xpos = np.arange(translation_rate_by_IP_by_time.shape[0])
ypos = np.arange(translation_rate_by_IP_by_time.shape[1])
yposM, xposM = np.meshgrid(ypos + 0.5, xpos + 0.5)
zpos = np.zeros(translation_rate_by_IP_by_time.shape).flatten()
In [68]:
dx = 0.5 * np.ones_like(zpos)
dy = 0.1 * np.ones_like(zpos)
dz = translation_rate_by_IP_by_time.values.ravel()
In [69]:
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(12, 9))
ax = fig.add_subplot(111, projection='3d')

#values = np.linspace(0.2, 1., xposM.ravel().shape[0])
#colors = cm.rainbow(values)

ax.bar3d(xposM.ravel(), yposM.ravel(), zpos, dx, dy, dz, #color=colors, 
         alpha=0.5)
Out[69]:
<mpl_toolkits.mplot3d.art3d.Poly3DCollection at 0x7f457a01ae90>

Too ugly. We try a (first: linear) heatmap instead.

In [70]:
df = translation_rate_by_IP_by_time

plt.pcolor(df, vmin=0, vmax=2.5)
# set the limits of the plot to the limits of the data
#plt.axis([x.min(), x.max(), y.min(), y.max()])
plt.yticks(np.arange(0.5, len(df.index), 1), df.index)
plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns, rotation='vertical')
plt.show()

Logarithmic heatmap:

In [71]:
df = np.log(1+translation_rate_by_IP_by_time)

plt.pcolor(df, vmin=0, vmax=2.5)
# set the limits of the plot to the limits of the data
#plt.axis([x.min(), x.max(), y.min(), y.max()])
plt.yticks(np.arange(0.5, len(df.index), 1), df.index)
plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns, rotation='vertical')
plt.show()

Try again with non-exponential bins:

In [72]:
bins = np.linspace(min(init_rates_df['init_rates']), max(init_rates_df['init_rates']), num=20)
bins
Out[72]:
array([  9.37576635e-04,   7.59120437e-01,   1.51730330e+00,
         2.27548616e+00,   3.03366902e+00,   3.79185188e+00,
         4.55003474e+00,   5.30821760e+00,   6.06640046e+00,
         6.82458332e+00,   7.58276618e+00,   8.34094904e+00,
         9.09913190e+00,   9.85731476e+00,   1.06154976e+01,
         1.13736805e+01,   1.21318633e+01,   1.28900462e+01,
         1.36482291e+01,   1.44064119e+01])
In [73]:
translation_rate_df_long['bin'] = pd.cut(translation_rate_df_long.init_rates, bins)
translation_rate_df_long.head()
Out[73]:
init_rates time variable value bin
3564 1.644214 0 YBR177C 0.076667 (1.517, 2.275]
3565 1.644214 300 YBR177C 0.113333 (1.517, 2.275]
3566 1.644214 600 YBR177C 0.000000 (1.517, 2.275]
3567 1.644214 900 YBR177C 0.116667 (1.517, 2.275]
3568 1.644214 1200 YBR177C 0.100000 (1.517, 2.275]
In [74]:
translation_rate_df_long_grouped = translation_rate_df_long.groupby(['bin', 'time'])
translation_rate_df_long_averaged = translation_rate_df_long_grouped.mean()
In [75]:
translation_rate_by_IP_by_time = translation_rate_df_long_averaged.unstack()['value']
In [76]:
xpos = np.arange(translation_rate_by_IP_by_time.shape[0])
ypos = np.arange(translation_rate_by_IP_by_time.shape[1])
yposM, xposM = np.meshgrid(ypos + 0.5, xpos + 0.5)
zpos = np.zeros(translation_rate_by_IP_by_time.shape).flatten()
In [77]:
df = translation_rate_by_IP_by_time

plt.pcolor(df, vmin=0, vmax=2.5)
# set the limits of the plot to the limits of the data
#plt.axis([x.min(), x.max(), y.min(), y.max()])
plt.yticks(np.arange(0.5, len(df.index), 1), df.index)
plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns, rotation='vertical')
plt.show()
In [78]:
df = np.log(1+translation_rate_by_IP_by_time)

plt.pcolor(df, vmin=0, vmax=2.5)
# set the limits of the plot to the limits of the data
#plt.axis([x.min(), x.max(), y.min(), y.max()])
plt.yticks(np.arange(0.5, len(df.index), 1), df.index)
plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns, rotation='vertical')
plt.show()